{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d7917323-9a3c-42ac-aed8-eef9176998e1",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 推荐系统 案例分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "69354a17-cf64-4af0-8b9f-7026e2e7cd27",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1750b9d2-6a72-4b3c-b9e7-5043a6d646d1",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## 准备数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "778c2118-1f5c-463d-af32-a2b30eb78088",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "DATA_PATH = \"./data/ratings.csv\"\n",
    "dtype = {\"userId\":np.int32,\"movieId\":np.int32,\"rating\":np.float32}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8900b954-04a4-4a11-aec9-67d921cefa62",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(DATA_PATH,dtype=dtype,usecols=range(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b6fe4323-753a-4965-b6d8-2650b9196245",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>47</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating\n",
       "0       1        1     4.0\n",
       "1       1        3     4.0\n",
       "2       1        6     4.0\n",
       "3       1       47     5.0\n",
       "4       1       50     5.0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "298987c6-70cd-45fc-a46d-f8ae7daf6913",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## 构造用户评分矩阵\n",
    "**用户对物品的评分矩阵**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "67953878-0632-4c67-a396-608602769559",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "rating_matrix = df.pivot_table(index=[\"userId\"],columns=[\"movieId\"],values=\"rating\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "eacad021-b23c-454e-8ab8-4f76426b789c",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>movieId</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>193565</th>\n",
       "      <th>193567</th>\n",
       "      <th>193571</th>\n",
       "      <th>193573</th>\n",
       "      <th>193579</th>\n",
       "      <th>193581</th>\n",
       "      <th>193583</th>\n",
       "      <th>193585</th>\n",
       "      <th>193587</th>\n",
       "      <th>193609</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>606</th>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>607</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>2.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>610 rows × 9724 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "movieId  1       2       3       4       5       6       7       8       \\\n",
       "userId                                                                    \n",
       "1           4.0     NaN     4.0     NaN     NaN     4.0     NaN     NaN   \n",
       "2           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "3           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "4           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "5           4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "...         ...     ...     ...     ...     ...     ...     ...     ...   \n",
       "606         2.5     NaN     NaN     NaN     NaN     NaN     2.5     NaN   \n",
       "607         4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "608         2.5     2.0     2.0     NaN     NaN     NaN     NaN     NaN   \n",
       "609         3.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "610         5.0     NaN     NaN     NaN     NaN     5.0     NaN     NaN   \n",
       "\n",
       "movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \\\n",
       "userId                   ...                                                   \n",
       "1           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "2           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "3           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "4           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "5           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "...         ...     ...  ...     ...     ...     ...     ...     ...     ...   \n",
       "606         NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "607         NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "608         NaN     4.0  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "609         NaN     4.0  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "610         NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "\n",
       "movieId  193583  193585  193587  193609  \n",
       "userId                                   \n",
       "1           NaN     NaN     NaN     NaN  \n",
       "2           NaN     NaN     NaN     NaN  \n",
       "3           NaN     NaN     NaN     NaN  \n",
       "4           NaN     NaN     NaN     NaN  \n",
       "5           NaN     NaN     NaN     NaN  \n",
       "...         ...     ...     ...     ...  \n",
       "606         NaN     NaN     NaN     NaN  \n",
       "607         NaN     NaN     NaN     NaN  \n",
       "608         NaN     NaN     NaN     NaN  \n",
       "609         NaN     NaN     NaN     NaN  \n",
       "610         NaN     NaN     NaN     NaN  \n",
       "\n",
       "[610 rows x 9724 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rating_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6c7d9aac-2f9f-44fb-9c4d-4b5cce931b2d",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## 构造相似度矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f0aeebcb-7ca2-433b-be19-2f268ffdc175",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "similarity = rating_matrix.T.corr()  # User  相似度矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b57c6dd5-bc9f-4213-864e-0cb72b9b9fd6",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>userId</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>601</th>\n",
       "      <th>602</th>\n",
       "      <th>603</th>\n",
       "      <th>604</th>\n",
       "      <th>605</th>\n",
       "      <th>606</th>\n",
       "      <th>607</th>\n",
       "      <th>608</th>\n",
       "      <th>609</th>\n",
       "      <th>610</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.079819</td>\n",
       "      <td>0.207983</td>\n",
       "      <td>0.268749</td>\n",
       "      <td>-0.291636</td>\n",
       "      <td>-0.118773</td>\n",
       "      <td>0.469668</td>\n",
       "      <td>0.918559</td>\n",
       "      <td>-0.037987</td>\n",
       "      <td>...</td>\n",
       "      <td>9.157371e-02</td>\n",
       "      <td>-1.597727e-16</td>\n",
       "      <td>-0.061503</td>\n",
       "      <td>-0.407556</td>\n",
       "      <td>-0.164871</td>\n",
       "      <td>0.066378</td>\n",
       "      <td>0.174557</td>\n",
       "      <td>0.268070</td>\n",
       "      <td>-0.175412</td>\n",
       "      <td>-0.032086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.991241</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.037796</td>\n",
       "      <td>...</td>\n",
       "      <td>-3.873468e-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.583333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.623288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.079819</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.433200</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.791334</td>\n",
       "      <td>-0.333333</td>\n",
       "      <td>-0.395092</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.569562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.207983</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.336525</td>\n",
       "      <td>0.148498</td>\n",
       "      <td>0.542861</td>\n",
       "      <td>0.117851</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.485794</td>\n",
       "      <td>...</td>\n",
       "      <td>-2.221127e-01</td>\n",
       "      <td>3.966413e-01</td>\n",
       "      <td>0.090090</td>\n",
       "      <td>-0.080296</td>\n",
       "      <td>0.400124</td>\n",
       "      <td>0.144603</td>\n",
       "      <td>0.116518</td>\n",
       "      <td>-0.170501</td>\n",
       "      <td>-0.277350</td>\n",
       "      <td>-0.043786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.268749</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.336525</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.043166</td>\n",
       "      <td>0.158114</td>\n",
       "      <td>0.028347</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.777714</td>\n",
       "      <td>...</td>\n",
       "      <td>2.719480e-16</td>\n",
       "      <td>1.533034e-01</td>\n",
       "      <td>0.234743</td>\n",
       "      <td>0.067791</td>\n",
       "      <td>-0.364156</td>\n",
       "      <td>0.244321</td>\n",
       "      <td>0.231080</td>\n",
       "      <td>-0.020546</td>\n",
       "      <td>0.384111</td>\n",
       "      <td>0.040582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>606</th>\n",
       "      <td>0.066378</td>\n",
       "      <td>0.583333</td>\n",
       "      <td>-0.791334</td>\n",
       "      <td>0.144603</td>\n",
       "      <td>0.244321</td>\n",
       "      <td>-0.049192</td>\n",
       "      <td>0.137771</td>\n",
       "      <td>0.253582</td>\n",
       "      <td>0.572700</td>\n",
       "      <td>-0.382955</td>\n",
       "      <td>...</td>\n",
       "      <td>2.904896e-01</td>\n",
       "      <td>1.406134e-01</td>\n",
       "      <td>0.318473</td>\n",
       "      <td>0.682949</td>\n",
       "      <td>0.167062</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.114191</td>\n",
       "      <td>0.240842</td>\n",
       "      <td>0.533002</td>\n",
       "      <td>0.389185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>607</th>\n",
       "      <td>0.174557</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.333333</td>\n",
       "      <td>0.116518</td>\n",
       "      <td>0.231080</td>\n",
       "      <td>0.255639</td>\n",
       "      <td>0.402792</td>\n",
       "      <td>0.251280</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.241121</td>\n",
       "      <td>...</td>\n",
       "      <td>6.982411e-01</td>\n",
       "      <td>2.172105e-01</td>\n",
       "      <td>0.192787</td>\n",
       "      <td>0.035806</td>\n",
       "      <td>-0.299641</td>\n",
       "      <td>0.114191</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.200814</td>\n",
       "      <td>0.190117</td>\n",
       "      <td>0.106605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>0.268070</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>-0.395092</td>\n",
       "      <td>-0.170501</td>\n",
       "      <td>-0.020546</td>\n",
       "      <td>0.125428</td>\n",
       "      <td>0.008081</td>\n",
       "      <td>0.434423</td>\n",
       "      <td>0.336625</td>\n",
       "      <td>-0.571043</td>\n",
       "      <td>...</td>\n",
       "      <td>4.739665e-01</td>\n",
       "      <td>2.976461e-01</td>\n",
       "      <td>0.086423</td>\n",
       "      <td>0.053986</td>\n",
       "      <td>-0.075673</td>\n",
       "      <td>0.240842</td>\n",
       "      <td>0.200814</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.488929</td>\n",
       "      <td>0.147606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>-0.175412</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.277350</td>\n",
       "      <td>0.384111</td>\n",
       "      <td>0.193649</td>\n",
       "      <td>0.420288</td>\n",
       "      <td>0.141860</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.885115e-01</td>\n",
       "      <td>0.343303</td>\n",
       "      <td>0.641624</td>\n",
       "      <td>-0.550000</td>\n",
       "      <td>0.533002</td>\n",
       "      <td>0.190117</td>\n",
       "      <td>0.488929</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.521773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>-0.032086</td>\n",
       "      <td>0.623288</td>\n",
       "      <td>0.569562</td>\n",
       "      <td>-0.043786</td>\n",
       "      <td>0.040582</td>\n",
       "      <td>0.115580</td>\n",
       "      <td>0.341233</td>\n",
       "      <td>0.167931</td>\n",
       "      <td>0.615638</td>\n",
       "      <td>-0.205081</td>\n",
       "      <td>...</td>\n",
       "      <td>7.024510e-03</td>\n",
       "      <td>4.926267e-02</td>\n",
       "      <td>0.270908</td>\n",
       "      <td>0.310611</td>\n",
       "      <td>0.462274</td>\n",
       "      <td>0.389185</td>\n",
       "      <td>0.106605</td>\n",
       "      <td>0.147606</td>\n",
       "      <td>-0.521773</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>610 rows × 610 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "userId       1         2         3         4         5         6         7    \\\n",
       "userId                                                                         \n",
       "1       1.000000       NaN  0.079819  0.207983  0.268749 -0.291636 -0.118773   \n",
       "2            NaN  1.000000       NaN       NaN       NaN       NaN -0.991241   \n",
       "3       0.079819       NaN  1.000000       NaN       NaN       NaN       NaN   \n",
       "4       0.207983       NaN       NaN  1.000000 -0.336525  0.148498  0.542861   \n",
       "5       0.268749       NaN       NaN -0.336525  1.000000  0.043166  0.158114   \n",
       "...          ...       ...       ...       ...       ...       ...       ...   \n",
       "606     0.066378  0.583333 -0.791334  0.144603  0.244321 -0.049192  0.137771   \n",
       "607     0.174557       NaN -0.333333  0.116518  0.231080  0.255639  0.402792   \n",
       "608     0.268070 -0.125000 -0.395092 -0.170501 -0.020546  0.125428  0.008081   \n",
       "609    -0.175412       NaN       NaN -0.277350  0.384111  0.193649  0.420288   \n",
       "610    -0.032086  0.623288  0.569562 -0.043786  0.040582  0.115580  0.341233   \n",
       "\n",
       "userId       8         9         10   ...           601           602  \\\n",
       "userId                                ...                               \n",
       "1       0.469668  0.918559 -0.037987  ...  9.157371e-02 -1.597727e-16   \n",
       "2            NaN       NaN  0.037796  ... -3.873468e-01           NaN   \n",
       "3            NaN       NaN       NaN  ...           NaN           NaN   \n",
       "4       0.117851       NaN  0.485794  ... -2.221127e-01  3.966413e-01   \n",
       "5       0.028347       NaN -0.777714  ...  2.719480e-16  1.533034e-01   \n",
       "...          ...       ...       ...  ...           ...           ...   \n",
       "606     0.253582  0.572700 -0.382955  ...  2.904896e-01  1.406134e-01   \n",
       "607     0.251280       NaN -0.241121  ...  6.982411e-01  2.172105e-01   \n",
       "608     0.434423  0.336625 -0.571043  ...  4.739665e-01  2.976461e-01   \n",
       "609     0.141860       NaN       NaN  ...  1.000000e+00  1.885115e-01   \n",
       "610     0.167931  0.615638 -0.205081  ...  7.024510e-03  4.926267e-02   \n",
       "\n",
       "userId       603       604       605       606       607       608       609  \\\n",
       "userId                                                                         \n",
       "1      -0.061503 -0.407556 -0.164871  0.066378  0.174557  0.268070 -0.175412   \n",
       "2      -1.000000       NaN       NaN  0.583333       NaN -0.125000       NaN   \n",
       "3       0.433200       NaN       NaN -0.791334 -0.333333 -0.395092       NaN   \n",
       "4       0.090090 -0.080296  0.400124  0.144603  0.116518 -0.170501 -0.277350   \n",
       "5       0.234743  0.067791 -0.364156  0.244321  0.231080 -0.020546  0.384111   \n",
       "...          ...       ...       ...       ...       ...       ...       ...   \n",
       "606     0.318473  0.682949  0.167062  1.000000  0.114191  0.240842  0.533002   \n",
       "607     0.192787  0.035806 -0.299641  0.114191  1.000000  0.200814  0.190117   \n",
       "608     0.086423  0.053986 -0.075673  0.240842  0.200814  1.000000  0.488929   \n",
       "609     0.343303  0.641624 -0.550000  0.533002  0.190117  0.488929  1.000000   \n",
       "610     0.270908  0.310611  0.462274  0.389185  0.106605  0.147606 -0.521773   \n",
       "\n",
       "userId       610  \n",
       "userId            \n",
       "1      -0.032086  \n",
       "2       0.623288  \n",
       "3       0.569562  \n",
       "4      -0.043786  \n",
       "5       0.040582  \n",
       "...          ...  \n",
       "606     0.389185  \n",
       "607     0.106605  \n",
       "608     0.147606  \n",
       "609    -0.521773  \n",
       "610     1.000000  \n",
       "\n",
       "[610 rows x 610 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c80042cd-2635-44ba-95ad-659ef6159f84",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 预测 用户 1 和 物品 1 的评分\n",
    "similar_users = similarity[1].drop([1]).dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3a87ee1e-e8b1-4d75-a6b6-aba3c2cd61e6",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 只要正相关的相似度用户\n",
    "similar_users = similar_users.where(similar_users>0).dropna() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "29fdec1b-e939-4b61-b5bf-62ca8c2cd126",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\install\\python37\\lib\\site-packages\\ipykernel_launcher.py:2: FutureWarning: Index.__and__ operating as a set operation is deprecated, in the future this will be a logical operation matching Series.__and__.  Use index.intersection(other) instead\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "# 在所有正相关的用户中 找到所有消费过 物品 1 的用户\n",
    "ids = rating_matrix[1].dropna().index & similar_users.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "44e20bcf-d101-4e2f-a677-befad230c608",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "final_similar_users = similar_users.loc[list(ids)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "cb7b9103-7f47-41b4-99fe-fb6d9866172b",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 计算用户1 对物品 1 的评分预测\n",
    "sum_up = 0 # 保存评分预测公式的分子\n",
    "sum_down = 0  # 保存评分预测公式的分母\n",
    "for sim_uid,similarity in final_similar_users.items():\n",
    "    similar_users_rated_movies = rating_matrix.loc[sim_uid].dropna()\n",
    "    pass\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbc70234-1967-4de8-b8d3-45541857cfe7",
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  },
  "toc-autonumbering": true
 },
 "nbformat": 4,
 "nbformat_minor": 5
}